Setup packages

# devtools::install_github("wilkelab/ungeviz")

library(tidyverse)

packages <- c("ggplot2", "lubridate", "magrittr", "forcats", "actogrammr", "ungeviz", "devtools", "behavr", "ggetho", "damr", "scopr", "sleepr", "zeitgebr", "momentuHMM", "ggpubr", "oce", "tictoc", "amt", "terra")
walk(packages, require, character.only = T)

Import data

activity_files <- paste0("data/", list.files(path = "data", pattern = "2021")) # writes .csv file names to chr vector - 2021 is common across all the datasets of interest
all_activity <- map(activity_files, read.csv, skip = 4) # reads csv files

Clean data

tags <- c("A05","A06","A07","A08","A09","A10","A11","A12","A13","A14")

for (i in 1:length(all_activity)) {
  all_activity[[i]]$ID <- tags[i] # add ID column
  all_activity[[i]]$DateTimeGMT <- as.POSIXct(all_activity[[i]]$GMT.Time, format = "%d/%m/%Y %I:%M:%S %p", tz = "UTC") # create POSIXct time
  all_activity[[i]]$DateTimeNZ <- with_tz(all_activity[[i]]$DateTimeGMT, "Pacific/Auckland")
  all_activity[[i]]$Temp <- all_activity[[i]]$Temperature..C. # rename temperature
  all_activity[[i]]$Temperature..C. <- NULL # remove untidy column
}

names(all_activity) <- tags
all_activity_df <- bind_rows(all_activity, .id = "ID") # bind all activity data into one dataframe

Exploratory data analysis

It appears there are quite clearly two distinct activity states, one with a higher ODBA and one with a lower ODBA.

for(i in 1:length(all_activity)) {

  # histogram of ODBA
  print(all_activity[[i]] |>  ggplot(aes(ODBA)) +
      geom_histogram(binwidth = 5, alpha = 1, fill = "orange") +
      ggtitle(paste0("Kākā ID: ", tags[i])) +
      scale_x_continuous("Overall Dynamic Body Acceleration (ODBA)") +
      theme_bw())
  
}

The log of ODBA also indicates a possible 3rd state.

for(i in 1:length(all_activity)) {

  # histogram of log(ODBA)
  print(all_activity[[i]] |>  ggplot(aes(log(ODBA))) +
    geom_histogram(binwidth = 0.025, alpha = 1, fill = "orange") +
      ggtitle(paste0("Kākā ID: ", tags[i])) +
      scale_x_continuous("log of Overall Dynamic Body Acceleration (ODBA)") +
    theme_bw())
  
}

Prepare data for HMM

# preparing as a single data frame
all_prepped <- prepData(data = all_activity_df, coordNames = NULL)
head(all_prepped, 10)
##     ID              GMT.Time ODBA         DateTimeGMT          DateTimeNZ Temp
## 1  A05 22/09/2020 1:18:00 am  246 2020-09-22 01:18:00 2020-09-22 13:18:00 20.0
## 2  A05 22/09/2020 1:19:00 am  146 2020-09-22 01:19:00 2020-09-22 13:19:00 19.5
## 3  A05 22/09/2020 1:20:00 am  140 2020-09-22 01:20:00 2020-09-22 13:20:00 19.5
## 4  A05 22/09/2020 1:21:00 am  128 2020-09-22 01:21:00 2020-09-22 13:21:00 19.0
## 5  A05 22/09/2020 1:22:00 am   37 2020-09-22 01:22:00 2020-09-22 13:22:00 19.0
## 6  A05 22/09/2020 1:23:00 am  105 2020-09-22 01:23:00 2020-09-22 13:23:00 19.0
## 7  A05 22/09/2020 1:24:00 am  336 2020-09-22 01:24:00 2020-09-22 13:24:00 19.0
## 8  A05 22/09/2020 1:25:00 am  404 2020-09-22 01:25:00 2020-09-22 13:25:00 19.0
## 9  A05 22/09/2020 1:26:00 am  246 2020-09-22 01:26:00 2020-09-22 13:26:00 19.0
## 10 A05 22/09/2020 1:27:00 am  418 2020-09-22 01:27:00 2020-09-22 13:27:00 18.5
# preparing as a list
all_prepped <- vector(mode = "list", length = length(all_activity))

for (i in 1:length(all_activity)) {
  all_prepped[[i]] <- prepData(data = all_activity[[i]], coordNames = NULL)
}

Testing distributions to provide intiial parameter estimates in the hidden Markov models

# test <- rweibull(10000, shape = .4, scale = 7)
# hist(test, breaks = 1000, xlim = c(1,100)) # state 1 minimum

# test <- log(rweibull(10000, shape = .5, scale = 8))
# hist(test, breaks = 1000) # looks appropriate for rest state

test <- log(rweibull(10000, shape = 1, scale = 8))
hist(test, breaks = 100) # looks appropriate for rest state - used in model

# test <- rweibull(10000, shape = 1, scale = 9)
# hist(test, breaks = 1000, xlim = c(1,100)) # state 1 maximum


# test <- rweibull(10000, shape = 1.5, scale = 150)
# hist(test, breaks = 100) # looks appropriate for active state

test <- rweibull(10000, shape = 2, scale = 200)
hist(test, breaks = 100) # looks appropriate for active state - used in model

# test <- rweibull(10000, shape = 2.5, scale = 250)
# hist(test, breaks = 100) # looks appropriate for active state
stateNames <- c("active", "inactive")
dist <- list(ODBA = "weibull")

# parameters of weibull distributions to fit to data
# STATE 1 - shape = 1, scale = 8 
# STATE 2 - shape = 2, scale = 250
Par0 <- list(ODBA = c(1, 2, 8, 250)) # initial parameter estimates

Fit HMM to all individuals iteratively

all_hmms <- vector(mode = "list", length = length(all_activity))

for(i in 1:length(all_activity)) {
  
  tic()
  
  all_hmms[[i]] <- fitHMM(all_prepped[[i]],
                    nbStates = 2,
                    dist = dist,
                    Par0 = Par0,
                    stateNames = stateNames) 
  toc()
  
  plot(all_hmms[[i]], plotCI = T, breaks = 50, ask = FALSE)

  states <- viterbi(all_hmms[[i]]) # fit viterbi algorithm
  table(states)/nrow(all_prepped[[i]]) # proportion of time spent in states
  all_prepped[[i]]$states <- as.factor(states) # add states column to DF
  
}
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE
## 37.84 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 60.19 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 72.88 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 54.35 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 30.09 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 30.83 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 60 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 63.38 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 49.32 sec elapsed
## Decoding state sequence... DONE
## =======================================================================
## Fitting HMM with 2 states and 1 data stream
## -----------------------------------------------------------------------
##  ODBA ~ weibull(shape=~1, scale=~1)
## 
##  Transition probability matrix formula: ~1
## 
##  Initial distribution formula: ~1
## =======================================================================
## DONE

## 30.52 sec elapsed
## Decoding state sequence... DONE

Organise the results and additional information into a data frame for plotting and further analysis

all_prepped_df <- bind_rows(all_prepped) # bind all activity data into one dataframe

all_prepped_df_nested <- all_prepped_df |> group_by(ID) |> nest()
all_prepped_df_nested$sex <- c("m", "m", "m", "f", "f", "f", "f", "m", "m", "f")
all_prepped_df_nested$age <- c(1, 10, 5, 1, 3, 2, 2, 3, 10, 8)
all_prepped_df_nested$origin <- c("o", "o", "c", "o", "o", "o", "o", "c", "c","o")

# estimate the sun's azimuth and angle from the latitude, longitude and time
sun_df <- data.frame(oce::sunAngle(all_prepped_df$DateTimeNZ, latitude = -45.77813, longitude = 170.604312))

all_prepped_df <- all_prepped_df_nested |> unnest(cols = c(data)) |> ungroup() |> 
  mutate(ID = factor(ID), 
         states = factor(states), 
         sex = factor(sex), 
         origin = factor(origin),
         year = lubridate::year(DateTimeNZ),
         month = lubridate::month(DateTimeNZ),
         month_chr = lubridate::month(DateTimeNZ, label = TRUE, abbr = FALSE),
         month_year = lubridate::floor_date(DateTimeNZ, unit = "month"),
         day = lubridate::day(DateTimeNZ),
         yday = as.numeric(round(difftime(DateTimeNZ, min(DateTimeNZ), units = "days"))),
         time = hms::as_hms(DateTimeNZ),
         sun_azimuth = sun_df$azimuth,
         sun_altitude = sun_df$altitude) 

head(all_prepped_df, 10)
## # A tibble: 10 × 19
##    ID    GMT.Time      ODBA DateTimeGMT         DateTimeNZ           Temp states
##    <fct> <chr>        <int> <dttm>              <dttm>              <dbl> <fct> 
##  1 A05   22/09/2020 …   246 2020-09-22 01:18:00 2020-09-22 13:18:00  20   2     
##  2 A05   22/09/2020 …   146 2020-09-22 01:19:00 2020-09-22 13:19:00  19.5 2     
##  3 A05   22/09/2020 …   140 2020-09-22 01:20:00 2020-09-22 13:20:00  19.5 2     
##  4 A05   22/09/2020 …   128 2020-09-22 01:21:00 2020-09-22 13:21:00  19   2     
##  5 A05   22/09/2020 …    37 2020-09-22 01:22:00 2020-09-22 13:22:00  19   2     
##  6 A05   22/09/2020 …   105 2020-09-22 01:23:00 2020-09-22 13:23:00  19   2     
##  7 A05   22/09/2020 …   336 2020-09-22 01:24:00 2020-09-22 13:24:00  19   2     
##  8 A05   22/09/2020 …   404 2020-09-22 01:25:00 2020-09-22 13:25:00  19   2     
##  9 A05   22/09/2020 …   246 2020-09-22 01:26:00 2020-09-22 13:26:00  19   2     
## 10 A05   22/09/2020 …   418 2020-09-22 01:27:00 2020-09-22 13:27:00  18.5 2     
## # ℹ 12 more variables: sex <fct>, age <dbl>, origin <fct>, year <dbl>,
## #   month <dbl>, month_chr <ord>, month_year <dttm>, day <int>, yday <dbl>,
## #   time <time>, sun_azimuth <dbl>, sun_altitude <dbl>
write_csv(all_prepped_df, paste0("outputs/data_files/all_prepped_df_", Sys.Date(), ".csv"))

Estimate the proportion of time spent in each state

prop <- vector(mode = "list", length = length(all_activity))

for (i in 1:length(all_activity)) {
  
prop[[i]] <- all_prepped[[i]] |> group_by(states) |> summarise(n = n()) %$% n

}

names(prop) <- tags
prop_df <- data.frame(do.call(rbind, prop)) |> mutate(id = tags)
colnames(prop_df) <- c("inactive", "active", "id")
prop_df <- prop_df |> mutate(prop_inactive = inactive/(inactive + active), prop_active = active/(inactive + active))

write_csv(prop_df, paste0("outputs/data_files/minutes_active_inactive_", Sys.Date(), ".csv"))

prop_df_long <- prop_df |> pivot_longer(cols = !"id", values_to = "value")

Plotting

Actograms

Colouring by ODBA

Showing a single day

i = 8

all_prepped_df %>% filter(ID == tags[i] & month == 9 & day == 1) %>% # 
  ggplot() +
  geom_point(aes(x = DateTimeNZ, y = ODBA), colour = "black", size = 0.75, alpha = 0.75) +
  # scale_colour_gradient(low = "dark blue", high = "white") +
  scale_colour_viridis_c() +
  scale_x_datetime("Time", date_breaks = "4 hours", date_labels = "%H:%M") +
  ggtitle(paste0("Kākā ID: ", tags[i])) +
  theme_classic() +
  theme(legend.position = "none")

ggsave(paste0("outputs/plots/scatter_singleday_ODBA_id_", tags[i], "_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300)

all_prepped_df %>% filter(ID == tags[i] & month == 9 & day == 1) %>% # 
  ggplot(aes(x = DateTimeNZ, y = day)) +
  geom_vpline(aes(colour = log(ODBA)), height = 1) +
  # scale_colour_gradient(low = "dark blue", high = "white") +
  scale_colour_viridis_c() +
  scale_x_datetime("Time", date_breaks = "4 hours", date_labels = "%H:%M") +
  ggtitle(paste0("Kākā ID: ", tags[i])) +
  theme_classic() +
  theme(legend.position = "none")
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggsave(paste0("outputs/plots/actogram_singleday_logODBA_id_", tags[i], "_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300)

Showing all days for all individuals

for(i in 1:length(tags)) {

  print(all_prepped_df %>% filter(ID == tags[i] & month %in% c(9:12, 1:2)) %>% # 
          ggplot(aes(x = time, y = day)) +
          geom_vpline(aes(colour = log(ODBA)), height = 1) +
          # scale_colour_gradient(low = "dark blue", high = "white") +
          scale_colour_viridis_c() +
          scale_y_reverse() +
          facet_wrap(~ month_year) +
          ggtitle(paste0("Kākā ID: ", tags[i])) +
          theme_bw() +
          theme(legend.position = "none"))
  
  ggsave(paste0("outputs/plots/actogram_logODBA_id_", tags[i], "_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300)
  
}

Colouring by state

for(i in 1:length(tags)) {

  print(all_prepped_df %>% filter(ID == tags[i] & month %in% c(9:12, 1:2)) %>% # 
          ggplot(aes(x = time, y = day)) +
          geom_vpline(aes(colour = states), height = 1) +
          # scale_colour_gradient(low = "dark blue", high = "white") +
          scale_colour_viridis_d() +
          scale_y_reverse() +
          facet_wrap(~ month_year) +
          ggtitle(paste0("Kākā ID: ", tags[i])) +
          theme_bw() +
          theme(legend.position = "none"))
  
}

Single day

i = 8 # select individual - ID 45512

sept_day_id12 <- all_prepped_df %>% filter(ID == tags[i] & month == 9 & day == 1)

# unique(diff(sign(jan_day_id12$sun_altitude))) # this function finds when the sun altitude changes sign (i.e. sunrise and sunset)

sept_plot <- sept_day_id12 %>%
  
  ggplot() +
  # adding a rectangle for nautical twilight
  geom_rect(aes(xmin = DateTimeNZ[which(diff(sign(sun_altitude - -12)) > 0)], 
                xmax = DateTimeNZ[which(diff(sign(sun_altitude)) > 0)], ymin = -Inf, ymax = Inf), fill = "skyblue", alpha = 0.01) +
  geom_rect(aes(xmin = DateTimeNZ[which(diff(sign(sun_altitude - -12)) < 0)], 
                xmax = DateTimeNZ[which(diff(sign(sun_altitude)) < 0)], ymin = -Inf, ymax = Inf), fill = "skyblue", alpha = 0.01) +
  geom_point(aes(x = DateTimeNZ, y = ODBA, colour = states), size = 0.75, alpha = 0.5) +
  geom_line(aes(x = DateTimeNZ, y = 3*sun_altitude)) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  # vertical line at time when sun rises
  geom_vline(xintercept = sept_day_id12$DateTimeNZ[which(diff(sign(sept_day_id12$sun_altitude)) > 0)], linetype = "dashed") +
  # vertical line at time when sun sets
  geom_vline(xintercept = sept_day_id12$DateTimeNZ[which(diff(sign(sept_day_id12$sun_altitude)) < 0)], linetype = "dashed") +
  scale_y_continuous(limits = c(min(3*sept_day_id12$sun_altitude), 750)) +
  # scale_colour_viridis_d() +
  ggtitle(paste0("Kākā ID: ", tags[i])) +
  theme_classic() +
  theme(legend.position = "none")

sept_plot
## Warning: Removed 1 rows containing missing values (`geom_point()`).

jan_day_id12 <- all_prepped_df %>% filter(ID == tags[i] & month == 1 & day == 1)

jan_plot <- jan_day_id12 %>%
  
  ggplot() +
  # adding a rectangle for nautical twilight
  geom_rect(aes(xmin = DateTimeNZ[which(diff(sign(sun_altitude - -12)) > 0)], 
                xmax = DateTimeNZ[which(diff(sign(sun_altitude)) > 0)], ymin = -Inf, ymax = Inf), fill = "skyblue", alpha = 0.01) +
    geom_rect(aes(xmin = DateTimeNZ[which(diff(sign(sun_altitude - -12)) < 0)], 
                xmax = DateTimeNZ[which(diff(sign(sun_altitude)) < 0)], ymin = -Inf, ymax = Inf), fill = "skyblue", alpha = 0.01) +
    geom_point(aes(x = DateTimeNZ, y = ODBA, colour = states), size = 0.75, alpha = 0.5) +
  geom_line(aes(x = DateTimeNZ, y = 3*sun_altitude)) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  # vertical line at time when sun rises
  geom_vline(xintercept = jan_day_id12$DateTimeNZ[which(diff(sign(jan_day_id12$sun_altitude)) > 0)], linetype = "dashed") +
  # vertical line at time when sun sets
  geom_vline(xintercept = jan_day_id12$DateTimeNZ[which(diff(sign(jan_day_id12$sun_altitude)) < 0)], linetype = "dashed") +
  scale_y_continuous(limits = c(min(3*sept_day_id12$sun_altitude), 750)) +
  # scale_colour_viridis_d() +
  # ggtitle(paste0("Kākā ID: ", tags[i])) +
  theme_classic() +
  theme(legend.position = "none")

jan_plot

ggarrange(sept_plot + rremove("xlab"), jan_plot, ncol = 1, nrow = 2)
## Warning: Removed 1 rows containing missing values (`geom_point()`).

ggsave(paste0("outputs/plots/HMM_45512_Sept_Jan_ggarranged_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300)

Determining the active time per day per individual

active_time_day_id <- all_prepped_df |> 
  group_by(ID, yday) |> 
  summarise(inactive_time = sum(states == 1), active_time = sum(states == 2), total = n()) |> 
  filter(total > 1400 & yday > 3 & yday < 180) |> 
  ungroup() |> 
  mutate(active_prop = active_time/total) 
## `summarise()` has grouped output by 'ID'. You can override using the `.groups`
## argument.
active_time_day_id_nested <- active_time_day_id %>% group_by(ID) %>% nest()
active_time_day_id_nested$sex <- c("m", "m", "m", "f", "f", "f", "f", "m", "m", "f")
active_time_day_id_nested$age <- c(1, 10, 5, 1, 3, 2, 2, 3, 10, 8)
active_time_day_id_nested$origin <- c("o", "o", "c", "o", "o", "o", "o", "c", "c","o")

active_time_day_id <- active_time_day_id_nested %>% unnest(data)

Plotting active time per day for a single individual

active_time_day_id |> filter(ID == "A12") |> ggplot() +
  geom_hline(yintercept = 480, linetype = "dashed") + 
  geom_hline(yintercept = 960, linetype = "dashed") +
  geom_smooth(aes(x = yday, y = inactive_time, colour = ID), fill = "orange", span = 0.5, alpha = 0.15, method = "loess", se = TRUE) +
  geom_point(aes(x = yday, y = inactive_time, colour = ID), colour = "orange", alpha = 0.85) +
  geom_smooth(aes(x = yday, y = active_time, colour = ID), fill = "skyblue", span = 0.5, alpha = 0.15, method = "loess", se = TRUE) +
  geom_point(aes(x = yday, y = active_time, colour = ID), colour = "skyblue", alpha = 0.85) +
  scale_colour_viridis_d() +
  scale_x_continuous("Day since start of study", breaks = seq(0,180, 30)) +
  scale_y_continuous("Number of minutes") +
  ggtitle("Active time per day") +
  theme_classic() +
  theme(legend.position = "none",
    plot.title = element_text(size = 25),        # Increase plot title size
    axis.title = element_text(size = 20),        # Increase axis title size
    axis.text = element_text(size = 16),         # Increase axis text size
    legend.title = element_text(size = 20),      # Increase legend title size
    legend.text = element_text(size = 16)        # Increase legend text size
  )
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggsave(paste0("outputs/plots/active_time_id_12_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300, scale = 1)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Plotting active time per day per individual

active_time_day_id |> ggplot() +
  geom_hline(yintercept = 480, linetype = "dashed") + 
  geom_hline(yintercept = 960, linetype = "dashed") +
  geom_smooth(aes(x = yday, y = inactive_time, colour = ID), fill = "orange", span = 0.5, alpha = 0.15, method = "loess", se = TRUE) +
  geom_point(aes(x = yday, y = inactive_time, colour = ID), alpha = 0.25) +
  geom_smooth(aes(x = yday, y = active_time, colour = ID), fill = "skyblue", span = 0.5, alpha = 0.15, method = "loess", se = TRUE) +
  geom_point(aes(x = yday, y = active_time, colour = ID), alpha = 0.25) +
  scale_colour_viridis_d() +
  scale_x_continuous("Day since start of study", breaks = seq(0,180, 30)) +
  scale_y_continuous("Number of minutes") +
  ggtitle("Active time per day per individual") +
  theme_classic() +
  theme(legend.position = "none",
    plot.title = element_text(size = 25),        # Increase plot title size
    axis.title = element_text(size = 20),        # Increase axis title size
    axis.text = element_text(size = 16),         # Increase axis text size
    legend.title = element_text(size = 20),      # Increase legend title size
    legend.text = element_text(size = 16)        # Increase legend text size
  )
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggsave(paste0("outputs/plots/active_time_all_ids_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Just males

active_time_day_id |> filter(sex == "m") |> ggplot() +
  geom_hline(yintercept = 480, linetype = "dashed") + 
  geom_hline(yintercept = 960, linetype = "dashed") +
  geom_smooth(aes(x = yday, y = inactive_time, colour = ID), fill = "orange", span = 0.5, alpha = 0.15, method = "loess", se = TRUE) +
  geom_point(aes(x = yday, y = inactive_time, colour = ID), alpha = 0.25) +
  geom_smooth(aes(x = yday, y = active_time, colour = ID), fill = "skyblue", span = 0.5, alpha = 0.15, method = "loess", se = TRUE) +
  geom_point(aes(x = yday, y = active_time, colour = ID), alpha = 0.25) +
  scale_colour_viridis_d() +
  scale_x_continuous("Day since start of study", breaks = seq(0,180, 30)) +
  scale_y_continuous("Number of minutes") +
  ggtitle("Active time per day per individual - males only (n = 5)") +
  theme_classic() +
  theme(legend.position = "none",
    plot.title = element_text(size = 25),        # Increase plot title size
    axis.title = element_text(size = 20),        # Increase axis title size
    axis.text = element_text(size = 16),         # Increase axis text size
    legend.title = element_text(size = 20),      # Increase legend title size
    legend.text = element_text(size = 16)        # Increase legend text size
  )
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggsave(paste0("outputs/plots/active_time_males_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Just females

active_time_day_id |> filter(sex == "f") |> ggplot() +
  geom_hline(yintercept = 480, linetype = "dashed") + 
  geom_hline(yintercept = 960, linetype = "dashed") +
  geom_smooth(aes(x = yday, y = inactive_time, colour = ID), fill = "orange", span = 0.5, alpha = 0.15, method = "loess", se = TRUE) +
  geom_point(aes(x = yday, y = inactive_time, colour = ID), alpha = 0.25) +
  geom_smooth(aes(x = yday, y = active_time, colour = ID), fill = "skyblue", span = 0.5, alpha = 0.15, method = "loess", se = TRUE) +
  geom_point(aes(x = yday, y = active_time, colour = ID), alpha = 0.25) +
  scale_colour_viridis_d() +
  scale_x_continuous("Day since start of study", breaks = seq(0,180, 30)) +
  scale_y_continuous("Number of minutes") +
  ggtitle("Active time per day per individual - females only (n = 5)") +
  theme_classic() +
  theme(legend.position = "none",
    plot.title = element_text(size = 25),        # Increase plot title size
    axis.title = element_text(size = 20),        # Increase axis title size
    axis.text = element_text(size = 16),         # Increase axis text size
    legend.title = element_text(size = 20),      # Increase legend title size
    legend.text = element_text(size = 16)        # Increase legend text size
  )
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggsave(paste0("outputs/plots/active_time_females_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Plotting average active time per day per individual - by age

active_avg_age <- active_time_day_id |> ggplot() +
  # geom_hline(yintercept = 480, linetype = "dashed") + 
  geom_hline(yintercept = 960, linetype = "dashed") +
  geom_boxplot(aes(x = factor(age), y = active_time, group = ID, fill = age), alpha = 0.75) +
  scale_y_continuous("Number of minutes", limits = c(0,1440)) +
  scale_x_discrete("Age") +
  ggtitle("Active time") +
  scale_fill_viridis_c() +
  theme_classic() +
  theme(legend.position = "none",
    plot.title = element_text(size = 25),        # Increase plot title size
    axis.title = element_text(size = 20),        # Increase axis title size
    axis.text = element_text(size = 16),         # Increase axis text size
    legend.title = element_text(size = 20),      # Increase legend title size
    legend.text = element_text(size = 16)        # Increase legend text size
  )

active_avg_age

Plotting average active time per day per individual - by sex

active_avg_sex <- active_time_day_id |> ggplot() +
  # geom_hline(yintercept = 480, linetype = "dashed") + 
  geom_hline(yintercept = 960, linetype = "dashed") +
  geom_boxplot(aes(x = factor(sex), y = active_time, group = ID, fill = sex), alpha = 0.75) +
  scale_y_continuous("Number of minutes", limits = c(0,1440)) +
  scale_x_discrete("Sex") +
  scale_fill_viridis_d() +
  theme_classic() +
  theme(legend.position = "none",
    plot.title = element_text(size = 25),        # Increase plot title size
    axis.title = element_text(size = 20),        # Increase axis title size
    axis.text = element_text(size = 16),         # Increase axis text size
    legend.title = element_text(size = 20),      # Increase legend title size
    legend.text = element_text(size = 16)        # Increase legend text size
  )

active_avg_sex

ggarrange(active_avg_age, active_avg_sex + rremove("ylab"), ncol = 2, nrow = 1, align = "hv")

ggsave(paste0("outputs/plots/active_time_avg_age_sex_", Sys.Date(), ".png"), width=300, height=170, units="mm", dpi = 300)

Assessing spatial activity

Import GPS data

gps_files <- paste0("data/", list.files(path = "data", pattern = "speed")) # writes .csv file names to chr vector - 2021 is common across all the datasets of interest
gps_list <- map(gps_files, read.csv) # reads csv files

names(gps_list) <- as.character(unique(all_prepped_df$ID))

gps_all_df <- bind_rows(gps_list) 
gps_all_nested <- gps_all_df %>% nest(data = -id)
gps_all_nested$ID <- as.character(unique(all_prepped_df$ID))

gps_all_df <- gps_all_nested |> unnest(cols = c(data)) |> mutate(X.1 = NULL, # remove a redundant index variable
                                           DateTimeNZ = with_tz(DateTime, "Pacific/Auckland"),
                                           DateTimeNZ_R = as.POSIXct(round.POSIXt(DateTimeNZ, units = "mins"))
                                           )

gps_all_with_states_df <- left_join(gps_all_df, all_prepped_df, by = join_by(DateTimeNZ_R == DateTimeNZ, ID == ID))

Nest the data and make tracks

gps_all_nested_tracks <- gps_all_with_states_df %>% nest(data = -id) %>% 
  mutate(trk = map(data, function(d) {
    make_track(d, lon, lat, DateTimeNZ, crs = 4326, all_cols = TRUE) %>% 
      transform_coords(2193)
  }))

# unnest to find the min and max values for the extent of the tracks
extent <- gps_all_nested_tracks %>% unnest(trk) %>% 
  summarise(xmin = min(x_), xmax = max(x_), ymin = min(y_), ymax = max(y_))

# create a template raster for the KDEs
template_raster <- rast(xmin = extent$xmin, xmax = extent$xmax, ymin = extent$ymin, ymax = extent$ymax, res = 50, crs = crs("epsg:2193"))
for(i in 1:length(gps_all_nested_tracks$trk)) {

# kde_full <- hr_kde(gps_all_nested_tracks$trk[[i]], trast = template_raster, levels = c(0.5, 0.75, 0.95))
# plot(kde_full)
# kde_full_isopleths <- hr_isopleths(kde_full, levels = c(0.5, 0.75, 0.95))
# 
# ggplot() +
#   geom_sf(data = kde_full_isopleths, fill = "skyblue", alpha = 0.25)

kde_inactive <- hr_kde(gps_all_nested_tracks$trk[[i]] |> filter(states == 1), trast = template_raster, levels = c(0.5, 0.75, 0.95))
# plot(kde_inactive)
kde_inactive_isopleths <- hr_isopleths(kde_inactive, levels = c(0.5, 0.75, 0.95))

kde_active <- hr_kde(gps_all_nested_tracks$trk[[i]] |> filter(states == 2), trast = template_raster, levels = c(0.5, 0.75, 0.95))
# plot(kde_active)
kde_active_isopleths <- hr_isopleths(kde_active, levels = c(0.5, 0.75, 0.95))

print(ggplot() +
  geom_sf(data = kde_inactive_isopleths, fill = "skyblue", alpha = 0.25) +
  geom_sf(data = kde_active_isopleths, fill = "orange", alpha = 0.25) +
  theme_bw())

print(hr_overlap(kde_inactive, kde_active, type = "ba"))

}

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.995

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.967

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.978

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.991

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.988

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.974

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.971

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.955

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.953

## # A tibble: 1 × 2
##   levels overlap
##    <dbl>   <dbl>
## 1      1   0.965